R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

Loading data from Excel into R

library(readxl)
library(readr)

#examplefile<-"C:/Users/sgartiyu/Desktop/R/multiTimelineActuaryvsDatascience.csv"
#input<-read_csv(examplefile)

#examplefilexl<-"C:/Users/sgartiyu/Desktop/R/multiTimelineActuaryvsDatascience.xlsx"
#inputexcel<-read_excel(examplefilexl,sheet="data")

ggplot2 examples

Example 1: Midwest data

library(ggplot2)
midwest <- read.csv("http://goo.gl/G1K41K")
options(scipen=999)
theme_set(theme_bw())  # pre-set the bw theme.
data("midwest", package = "ggplot2")

# Scatterplot
gg <- ggplot(midwest, aes(x=area, y=poptotal)) + 
  geom_point(aes(col=state, size=popdensity)) + 
  geom_smooth(method="loess", se=F) + 
  xlim(c(0, 0.1)) + 
  ylim(c(0, 500000)) + 
  labs(subtitle="Area Vs Population", 
       y="Population", 
       x="Area", 
       title="Scatterplot", 
       caption = "Source: midwest")

plot(gg)

Example 2: mpg data

http://r-statistics.co/Top50-Ggplot2-Visualizations-MasterList-R-Code.html#top

mpg <- read.csv("http://goo.gl/uEeRGu")
data(mpg, package="ggplot2")
g <- ggplot(mpg, aes(manufacturer))
g + geom_bar(aes(fill=class), width = 0.5) + 
  theme(axis.text.x = element_text(angle=65, vjust=0.6)) + 
  labs(title="Histogram on Categorical Variable", 
       subtitle="Manufacturer across Vehicle Classes") 

g <- ggplot(mpg, aes(class, cty))
g + geom_boxplot(varwidth=T, fill="plum") + 
  labs(title="Box plot", 
       subtitle="City Mileage grouped by Class of vehicle",
       caption="Source: mpg",
       x="Class of Vehicle",
       y="City Mileage")

leaflet Example

Example 1 : Energy Production Data https://cran.r-project.org/web/packages/leaflet.minicharts/vignettes/introduction.html

library(leaflet)
library(leaflet.minicharts)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
prod2016 <- eco2mix %>%
  mutate(
    renewable = bioenergy + solar + wind + hydraulic,
    non_renewable = total - bioenergy - solar - wind - hydraulic
  ) %>%
  filter(grepl("2016", month) & area != "France") %>%
  select(-month) %>%
  group_by(area, lat, lng) %>%
  summarise_all(sum) %>%
  ungroup()

tilesURL <- "http://server.arcgisonline.com/ArcGIS/rest/services/Canvas/World_Light_Gray_Base/MapServer/tile/{z}/{y}/{x}"

basemap <- leaflet(width = "100%", height = "400px") %>%
  addTiles(tilesURL)


colors <- c("#4fc13c", "#cccccc")

basemap %>%
  addMinicharts(
    prod2016$lng, prod2016$lat,
    type = "pie",
    chartdata = prod2016[, c("renewable", "non_renewable")], 
    colorPalette = colors, 
    width = 60 * sqrt(prod2016$total) / sqrt(max(prod2016$total)), transitionTime = 0
  )
renewable2016 <- prod2016 %>% select(hydraulic, solar, wind)
colors <- c("#3093e5", "#fcba50", "#a0d9e8")
basemap %>%
  addMinicharts(
    prod2016$lng, prod2016$lat,
    chartdata = renewable2016,
    colorPalette = colors,
    width = 45, height = 45
  )
basemap %>%
  addMinicharts(
    prod2016$lng, prod2016$lat,
    chartdata = prod2016$load,
    showLabels = TRUE,
    width = 45
  )

Example 2 : Leaflet vs GoogleVis for Earthquake data

https://rawgit.com/mages/GIRO2012/master/Using_R_in_Insurance_GIRO_2012.html

library(XML)
library(googleVis)
## Creating a generic function for 'toJSON' from package 'jsonlite' in package 'googleVis'
## 
## Welcome to googleVis version 0.6.4
## 
## Please read Google's Terms of Use
## before you start using the package:
## https://developers.google.com/terms/
## 
## Note, the plot method of googleVis will by default use
## the standard browser to display its output.
## 
## See the googleVis package vignettes for more details,
## or visit https://github.com/mages/googleVis.
## 
## To suppress this message use:
## suppressPackageStartupMessages(library(googleVis))
## Source data diretly from the web
url <- "http://ds.iris.edu/sm2/eventlist/"
eq <- readHTMLTable(readLines(url),
                    colClasses=c("factor", rep("numeric", 4), "factor"))$evTable
names(eq) <- c("DATE", "LAT", "LON", "MAG",
               "DEPTH", "LOCATION_NAME", "IRIS_ID")
##Format location data
eq$loc=paste(eq$LAT, eq$LON, sep=":")  
summary(eq)
##                    DATE          LAT              LON         
##  01-NOV-2019 00:17:21:  1   Min.   :-62.85   Min.   :-179.78  
##  01-NOV-2019 00:30:08:  1   1st Qu.:-15.98   1st Qu.: -69.34  
##  01-NOV-2019 02:32:42:  1   Median :  1.44   Median : 111.36  
##  01-NOV-2019 02:34:01:  1   Mean   :  3.21   Mean   :  43.90  
##  01-NOV-2019 02:51:52:  1   3rd Qu.: 19.32   3rd Qu.: 128.96  
##  01-NOV-2019 04:14:12:  1   Max.   : 79.90   Max.   : 179.78  
##  (Other)             :797                                     
##       MAG            DEPTH                      LOCATION_NAME
##  Min.   :4.000   Min.   :  3.00   NORTHERN MOLUCCA SEA : 60  
##  1st Qu.:4.300   1st Qu.: 10.00   MINDANAO, PHILIPPINES: 47  
##  Median :4.600   Median : 35.00   FIJI ISLANDS REGION  : 36  
##  Mean   :4.636   Mean   : 88.23   IRIAN JAYA, INDONESIA: 27  
##  3rd Qu.:4.900   3rd Qu.:100.50   SOUTHERN IRAN        : 21  
##  Max.   :7.100   Max.   :643.00   TONGA ISLANDS REGION : 21  
##                                   (Other)              :591  
##      IRIS_ID        loc           
##  11133216:  1   Length:803        
##  11133251:  1   Class :character  
##  11133272:  1   Mode  :character  
##  11133313:  1                     
##  11133325:  1                     
##  11133332:  1                     
##  (Other) :797
#Display earth quake information of last 30 days

## Create a geo chart with the Google Chart API
G <- gvisGeoChart(eq, "loc", "DEPTH", "MAG",
                  options=list(displayMode="Markers", 
                               colorAxis="{colors:['purple', 'red', 'orange', 'grey']}",
                               backgroundColor="lightblue"), chartid="EQ")

plot(G)
## starting httpd help server ...
##  done
#use leaflet to plot 
tilesURL <- "http://server.arcgisonline.com/ArcGIS/rest/services/Canvas/World_Light_Gray_Base/MapServer/tile/{z}/{y}/{x}"

basemap <- leaflet() %>%
  addTiles(tilesURL)

basemap %>%
    addMinicharts(
    eq$LON, eq$LAT,
    chartdata=eq$MAG,
    showLabels = TRUE,
    width = 10
  )

R Plotly example

library(gsl)
library(mbbefd)
## Loading required package: fitdistrplus
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## Loading required package: survival
## Loading required package: npsurv
## Loading required package: lsei
## Loading required package: alabama
## Loading required package: numDeriv
## Loading required package: Rcpp
## Package:  mbbefd
## Version:  0.8.8.5
## Date:     2019-01-02 11:50:03 UTC
## BugReport: http://github.com/spedygiorgio/mbbefd/issues
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:MASS':
## 
##     select
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
TIVpct<-seq(0,1,0.001)
trace_1<-ecMBBEFD(TIVpct,swissRe(1)[["g"]],swissRe(1)[["b"]])
trace_2<-ecMBBEFD(TIVpct,swissRe(2)[["g"]],swissRe(2)[["b"]])
trace_3<-ecMBBEFD(TIVpct,swissRe(3)[["g"]],swissRe(3)[["b"]])
data<-data.frame(TIVpct,trace_1,trace_2,trace_3)

plot_ly(data,x=TIVpct,y=trace_1,name="SwissRe1",type='scatter',mode='lines')%>%
  add_trace(y=trace_2,name="SwissRe2",mode='lines')%>%
  add_trace(y=trace_3,name="SwissRe3",mode='lines')

GLM Generalized Linear Model example

from website https://www.guru99.com/r-generalized-linear-model.html

traindata<-read.csv("https://raw.githubusercontent.com/guru99-edu/R-Programming/master/adult.csv")
head(traindata,10)
##     x age        workclass    education educational.num     marital.status
## 1   1  25          Private         11th               7      Never-married
## 2   2  38          Private      HS-grad               9 Married-civ-spouse
## 3   3  28        Local-gov   Assoc-acdm              12 Married-civ-spouse
## 4   4  44          Private Some-college              10 Married-civ-spouse
## 5   5  18                ? Some-college              10      Never-married
## 6   6  34          Private         10th               6      Never-married
## 7   7  29                ?      HS-grad               9      Never-married
## 8   8  63 Self-emp-not-inc  Prof-school              15 Married-civ-spouse
## 9   9  24          Private Some-college              10      Never-married
## 10 10  55          Private      7th-8th               4 Married-civ-spouse
##     race gender hours.per.week income
## 1  Black   Male             40  <=50K
## 2  White   Male             50  <=50K
## 3  White   Male             40   >50K
## 4  Black   Male             40   >50K
## 5  White Female             30  <=50K
## 6  White   Male             30  <=50K
## 7  Black   Male             40  <=50K
## 8  White   Male             32   >50K
## 9  White Female             40  <=50K
## 10 White   Male             10  <=50K
names(traindata)
##  [1] "x"               "age"             "workclass"      
##  [4] "education"       "educational.num" "marital.status" 
##  [7] "race"            "gender"          "hours.per.week" 
## [10] "income"
ggplot(traindata, aes(x = gender, fill = income)) +
  geom_bar(position = "fill") +
  theme_classic()+theme(axis.text.x = element_text(angle = 90))

ggplot(traindata, aes(x = marital.status, fill = income)) +
  geom_bar(position = "fill") +
  theme_classic()+theme(axis.text.x = element_text(angle = 90))

ggplot(traindata, aes(x = gender, y = hours.per.week)) +
  geom_boxplot() +
  stat_summary(fun.y = mean,
               geom = "point",
               size = 3,
               color = "steelblue") +
  theme_classic()

library(GGally)
## 
## Attaching package: 'GGally'
## The following object is masked from 'package:dplyr':
## 
##     nasa
library(dplyr)
# Convert data to numeric
corr <- data.frame(lapply(traindata, as.integer))
# Plot the graph
ggcorr(corr,
method = c("pairwise", "spearman"),
nbreaks = 6,
hjust = 0.8,
label = TRUE,
label_size = 3,
color = "grey50")

#setting train and test datasets
recast_data <- traindata %>%
                select(-x)
set.seed(1234)
create_train_test <- function(data, size = 0.8, train = TRUE) {
  n_row = nrow(data)
  total_row = size * n_row
  train_sample <- 1: total_row
  if (train == TRUE) {
    return (data[train_sample, ])
  } else {
    return (data[-train_sample, ])
  }
}
data_train <- create_train_test(recast_data, 0.8, train = TRUE)
data_test <- create_train_test(recast_data, 0.8, train = FALSE)
dim(data_train)
## [1] 39073     9
formula <- income~.
logit <- glm(formula, data = data_train, family = 'binomial')
summary(logit)
## 
## Call:
## glm(formula = formula, family = "binomial", data = data_train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.6812  -0.5725  -0.2569  -0.0760   3.2066  
## 
## Coefficients: (1 not defined because of singularities)
##                                      Estimate Std. Error z value
## (Intercept)                         -7.804801   0.262054 -29.783
## age                                  0.029258   0.001355  21.590
## workclassFederal-gov                 1.394048   0.116969  11.918
## workclassLocal-gov                   0.780767   0.104153   7.496
## workclassNever-worked               -5.722083  70.726167  -0.081
## workclassPrivate                     0.876346   0.091215   9.607
## workclassSelf-emp-inc                1.325356   0.112010  11.832
## workclassSelf-emp-not-inc            0.286969   0.101262   2.834
## workclassState-gov                   0.587134   0.114957   5.107
## workclassWithout-pay                 0.318163   0.855032   0.372
## education11th                        0.158776   0.180589   0.879
## education12th                        0.528823   0.226839   2.331
## education1st-4th                    -0.966682   0.414658  -2.331
## education5th-6th                    -0.421754   0.261480  -1.613
## education7th-8th                    -0.379423   0.199574  -1.901
## education9th                        -0.387995   0.232483  -1.669
## educationAssoc-acdm                  1.890430   0.151964  12.440
## educationAssoc-voc                   1.702838   0.146993  11.584
## educationBachelors                   2.605945   0.135557  19.224
## educationDoctorate                   3.691513   0.181594  20.328
## educationHS-grad                     1.033916   0.134069   7.712
## educationMasters                     3.089361   0.142670  21.654
## educationPreschool                  -1.239897   1.030867  -1.203
## educationProf-school                 3.711683   0.170324  21.792
## educationSome-college                1.510526   0.135466  11.151
## educational.num                            NA         NA      NA
## marital.statusMarried-AF-spouse      2.329443   0.445473   5.229
## marital.statusMarried-civ-spouse     2.141602   0.056389  37.979
## marital.statusMarried-spouse-absent  0.157250   0.169541   0.928
## marital.statusNever-married         -0.415895   0.068571  -6.065
## marital.statusSeparated             -0.048994   0.133401  -0.367
## marital.statusWidowed               -0.091912   0.125694  -0.731
## raceAsian-Pac-Islander               0.109641   0.198581   0.552
## raceBlack                            0.116996   0.188891   0.619
## raceOther                            0.022018   0.269879   0.082
## raceWhite                            0.391186   0.180182   2.171
## genderMale                           0.089880   0.041729   2.154
## hours.per.week                       0.030004   0.001330  22.559
##                                                 Pr(>|z|)    
## (Intercept)                         < 0.0000000000000002 ***
## age                                 < 0.0000000000000002 ***
## workclassFederal-gov                < 0.0000000000000002 ***
## workclassLocal-gov                    0.0000000000000656 ***
## workclassNever-worked                             0.9355    
## workclassPrivate                    < 0.0000000000000002 ***
## workclassSelf-emp-inc               < 0.0000000000000002 ***
## workclassSelf-emp-not-inc                         0.0046 ** 
## workclassState-gov                    0.0000003265562993 ***
## workclassWithout-pay                              0.7098    
## education11th                                     0.3793    
## education12th                                     0.0197 *  
## education1st-4th                                  0.0197 *  
## education5th-6th                                  0.1068    
## education7th-8th                                  0.0573 .  
## education9th                                      0.0951 .  
## educationAssoc-acdm                 < 0.0000000000000002 ***
## educationAssoc-voc                  < 0.0000000000000002 ***
## educationBachelors                  < 0.0000000000000002 ***
## educationDoctorate                  < 0.0000000000000002 ***
## educationHS-grad                      0.0000000000000124 ***
## educationMasters                    < 0.0000000000000002 ***
## educationPreschool                                0.2291    
## educationProf-school                < 0.0000000000000002 ***
## educationSome-college               < 0.0000000000000002 ***
## educational.num                                       NA    
## marital.statusMarried-AF-spouse       0.0000001702923404 ***
## marital.statusMarried-civ-spouse    < 0.0000000000000002 ***
## marital.statusMarried-spouse-absent               0.3537    
## marital.statusNever-married           0.0000000013184768 ***
## marital.statusSeparated                           0.7134    
## marital.statusWidowed                             0.4646    
## raceAsian-Pac-Islander                            0.5809    
## raceBlack                                         0.5357    
## raceOther                                         0.9350    
## raceWhite                                         0.0299 *  
## genderMale                                        0.0312 *  
## hours.per.week                      < 0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 42846  on 39072  degrees of freedom
## Residual deviance: 28488  on 39036  degrees of freedom
## AIC: 28562
## 
## Number of Fisher Scoring iterations: 10
predict <- predict(logit, data_test, type = 'response')

# confusion matrix
table_mat <- table(data_test$income, predict > 0.5)
table_mat
##        
##         FALSE TRUE
##   <=50K  6861  504
##   >50K   1144 1260
# accuracy Test
accuracy_Test <- sum(diag(table_mat)) / sum(table_mat)
accuracy_Test
## [1] 0.8313031

ChainLadder Example

https://gist.github.com/mages/3687713/659b2826d429823ff4ddb139d4d1bf46fe794dac https://rawgit.com/mages/GIRO2012/master/Using_R_in_Insurance_GIRO_2012.html

library(ChainLadder)
## 
## Welcome to ChainLadder version 0.2.10
## 
## Type vignette('ChainLadder', package='ChainLadder') to access
## the overall package documentation.
## 
## See demo(package='ChainLadder') for a list of demos.
## 
## More information is available on the ChainLadder project web-site:
## https://github.com/mages/ChainLadder
## 
## To suppress this message use:
## suppressPackageStartupMessages(library(ChainLadder))
library(googleVis)

RAA
##       dev
## origin    1     2     3     4     5     6     7     8     9    10
##   1981 5012  8269 10907 11805 13539 16181 18009 18608 18662 18834
##   1982  106  4285  5396 10666 13782 15599 15496 16169 16704    NA
##   1983 3410  8992 13873 16141 18735 22214 22863 23466    NA    NA
##   1984 5655 11555 15766 21266 23425 26083 27067    NA    NA    NA
##   1985 1092  9565 15836 22169 25955 26180    NA    NA    NA    NA
##   1986 1513  6445 11702 12935 15852    NA    NA    NA    NA    NA
##   1987  557  4020 10946 12314    NA    NA    NA    NA    NA    NA
##   1988 1351  6947 13112    NA    NA    NA    NA    NA    NA    NA
##   1989 3133  5395    NA    NA    NA    NA    NA    NA    NA    NA
##   1990 2063    NA    NA    NA    NA    NA    NA    NA    NA    NA
MCL <- MackChainLadder(RAA)
plot(MCL)

MCL
## MackChainLadder(Triangle = RAA)
## 
##      Latest Dev.To.Date Ultimate   IBNR Mack.S.E CV(IBNR)
## 1981 18,834       1.000   18,834      0        0      NaN
## 1982 16,704       0.991   16,858    154      143    0.928
## 1983 23,466       0.974   24,083    617      592    0.959
## 1984 27,067       0.943   28,703  1,636      713    0.436
## 1985 26,180       0.905   28,927  2,747    1,452    0.529
## 1986 15,852       0.813   19,501  3,649    1,995    0.547
## 1987 12,314       0.694   17,749  5,435    2,204    0.405
## 1988 13,112       0.546   24,019 10,907    5,354    0.491
## 1989  5,395       0.336   16,045 10,650    6,332    0.595
## 1990  2,063       0.112   18,402 16,339   24,566    1.503
## 
##               Totals
## Latest:   160,987.00
## Dev:            0.76
## Ultimate: 213,122.23
## IBNR:      52,135.23
## Mack.S.E   26,880.74
## CV(IBNR):       0.52
class(RAA)
## [1] "triangle" "matrix"
df <- as.data.frame((RAA))
names(df)
## [1] "origin" "dev"    "value"
ggplot(df,aes(x=dev,y=value/1000,color=origin,group=origin))+geom_line() 

data(GenIns)
dimnames(GenIns)$origin=2002:2011
GenIns <- GenIns/1000
GenIns<-round(GenIns, 0)
df2<-as.data.frame((GenIns))
MCL <- MackChainLadder(GenIns)
plot(MCL)

MCL
## MackChainLadder(Triangle = GenIns)
## 
##      Latest Dev.To.Date Ultimate    IBNR Mack.S.E CV(IBNR)
## 2002  3,901      1.0000    3,901     0.0      0.0      NaN
## 2003  5,339      0.9828    5,432    93.3     71.4    0.765
## 2004  4,909      0.9129    5,378   468.6    118.4    0.253
## 2005  4,588      0.8662    5,297   708.5    130.5    0.184
## 2006  3,873      0.7975    4,857   983.7    260.3    0.265
## 2007  3,692      0.7225    5,110 1,418.1    409.9    0.289
## 2008  3,483      0.6154    5,659 2,176.5    557.3    0.256
## 2009  2,864      0.4223    6,782 3,918.0    873.9    0.223
## 2010  1,363      0.2417    5,640 4,277.3    970.4    0.227
## 2011    344      0.0693    4,967 4,623.3  1,360.9    0.294
## 
##              Totals
## Latest:   34,356.00
## Dev:           0.65
## Ultimate: 53,023.29
## IBNR:     18,667.29
## Mack.S.E   2,437.59
## CV(IBNR):      0.13
ggplot(df2,aes(x=dev,y=value,color=origin,group=origin))+
  geom_line(size=1)+
  geom_point()+
  ggtitle("Chart 1: Reserving Incurred Development Patterns")+
  xlab("Development Year")+
  ylab("Incurred Amount")